// Ulrich J. Eberle, J. Vernon Henderson, Dominic Rohner, and Kurt Schmidheiny
// Ethno-Linguistic Diversity and Urban Agglomeration
// Forthcoming in Proceedings of the National Academy of Sciences, 2020

// Replication file (STATA)

// Install packages
// ssc install estout, replace
// ssc install reghdfe, replace
// ssc install ftools, replace
// ssc install coefplot, replace
// ssc install binscatter, replace

version 15.0

clear
set more off

// Open data
use ethnicurbanization.dta

/************************************ A. Main Article ************************************/


/**************************************** Tables *****************************************/

* Table 1. Ethno-linguistic Fractionalization and Urbanization Patterns
foreach n of numlist 1/2  { // n==1: Panel A (cross-sectional) ; n==2: Panel B (longitudinal)
	// Urban share
	if `n' == 1 { 
		// Panel A (cross-sectional)
		local pastdepctrl 
	}
	if `n' == 2 { 
		// Panel B (longitudinal)
		local pastdepctrl urbanshare_1975
	}	
	// Urban share: without controls
	eststo: reghdfe urbanshare_2015 frac15_1975	`pastdepctrl'																	,absorb(country_id) vce(cluster country_id)
		qui estadd local Provinces =  e(N) 
		qui estadd local Countries =  e(N_clust)
		qui estadd local Country_FE "Yes"			
	// Urban share: without controls
	eststo: reghdfe urbanshare_2015 frac15_1975	`pastdepctrl' ruggedness popdensity_1975										,absorb(country_id) vce(cluster country_id)
		qui estadd local Provinces =  e(N) 
		qui estadd local Countries =  e(N_clust)
		qui estadd local Country_FE "Yes"				
		qui estadd local Ruggedness "Yes"
		qui estadd local Popdensity1975 "Yes"								
				
	// Primate share
	if `n' == 2 { 
		local pastdepctrl 	primateshare_1975
	}
	// Primate share: full sample, without controls
	eststo: reghdfe primateshare_2015 frac15_1975 `pastdepctrl'																	,absorb(country_id) vce(cluster country_id)
		qui estadd local Provinces =  e(N) 
		qui estadd local Countries =  e(N_clust)
		qui estadd local Country_FE "Yes"		
	// Primate share: full sample, with controls
	eststo: reghdfe primateshare_2015 frac15_1975 `pastdepctrl' ruggedness popdensity_1975										,absorb(country_id) vce(cluster country_id)
		qui estadd local Provinces =  e(N) 
		qui estadd local Countries =  e(N_clust)
		qui estadd local Country_FE "Yes"				
		qui estadd local Ruggedness "Yes"
		qui estadd local Popdensity1975 "Yes"				
	// Primate share: restricted sample, without controls
	eststo: reghdfe primateshare_2015 frac15_1975 `pastdepctrl'									if  primateshare_1975!=. 		,absorb(country_id) vce(cluster country_id)
		qui estadd local Provinces =  e(N) 
		qui estadd local Countries =  e(N_clust)
		qui estadd local Country_FE "Yes"				
	// Primate share: restricted sample, with controls								
	eststo: reghdfe primateshare_2015 frac15_1975 `pastdepctrl' ruggedness popdensity_1975		if  primateshare_1975!=. 		,absorb(country_id) vce(cluster country_id)
		qui estadd local Provinces =  e(N) 
		qui estadd local Countries =  e(N_clust)
		qui estadd local Country_FE "Yes"
		qui estadd local Ruggedness "Yes"
		qui estadd local Popdensity1975 "Yes"					
		 
	if `n' == 1 {  
		esttab using Table_1.csv, stats(r2_a Provinces Countries Country_FE Ruggedness Popdensity1975, fmt(3 0)) starlevels(* .1 ** .05 *** .01) b(%9.3f) se(%9.3f) keep(*frac*) replace    
		eststo clear		
	}
	if `n' == 2 {    
		esttab using Table_1.csv, stats(r2_a Provinces Countries Country_FE Ruggedness Popdensity1975, fmt(3 0)) starlevels(* .1 ** .05 *** .01) b(%9.3f) se(%9.3f) keep(*frac* *urb* *primate*)  append   
		eststo clear		
	}			
}

* Table 2. Policy Implications: The Role of Democracy
foreach n of numlist 1/2 { // n==1: Panel A (cross-sectional) ; n==2: Panel B (longitudinal)
	// Polity 4
	foreach y in urbanshare primateshare {
		if `n' == 1 { 
			// Panel A (cross-sectional)
			local pastdepctrl 
		}		
		if `n' == 2 { 
			// Panel B (longitudinal)
			local pastdepctrl `y'_1975
		}
		qui gen frac15_autocracy_1975 = frac15_1975 * polityM033_1975	
		qui gen frac15_intregime_1975 = frac15_1975 * polityM3466_1975	
		qui gen frac15_democracy_1975 = frac15_1975 * polityM67100_1975	
		eststo: reghdfe `y'_2015 frac15_democracy_1975 frac15_intregime_1975 frac15_autocracy_1975 ruggedness popdensity_1975 `pastdepctrl' 	if `y'_1975!=. 	,absorb(country_id) vce(cluster country_id)
			qui test frac15_democracy_1975 = frac15_intregime_1975
			qui estadd local Ptest1 = round( r(p), .001) 
			qui test frac15_intregime_1975 = frac15_autocracy_1975
			qui estadd local Ptest2 = round( r(p), .001) 	
			qui test frac15_democracy_1975 = frac15_autocracy_1975
			qui estadd local Ptest3 = round( r(p), .001) 	
			qui estadd local Provinces =  e(N) 
			qui estadd local Countries =  e(N_clust)
			qui estadd local Country_FE "Yes"
			qui estadd local Ruggedness "Yes"
			qui estadd local Popdensity1975 "Yes"							
		qui drop frac15_autocracy_1975 frac15_intregime_1975 frac15_democracy_1975  	
	}
		
	// Freedom House
	foreach y in urbanshare primateshare {
		if `n' == 2 { 
			local pastdepctrl `y'_1975
		}
		qui gen frac15_autocracy_1975 = frac15_1975 * fh_statusDnotfree_1975	
		qui gen frac15_intregime_1975 = frac15_1975 * fh_statusDpartfree_1975	
		qui gen frac15_democracy_1975 = frac15_1975 * fh_statusDfree_1975		
		eststo: reghdfe `y'_2015 frac15_democracy_1975 frac15_intregime_1975 frac15_autocracy_1975 ruggedness popdensity_1975 `pastdepctrl' 	if `y'_1975!=. 	,absorb(country_id) vce(cluster country_id)
			qui test frac15_democracy_1975 = frac15_intregime_1975
			qui estadd local Ptest1 = round( r(p), .001) 
			qui test frac15_intregime_1975 = frac15_autocracy_1975
			qui estadd local Ptest2 = round( r(p), .001) 	
			qui test frac15_democracy_1975 = frac15_autocracy_1975
			qui estadd local Ptest3 = round( r(p), .001) 					
			qui estadd local Provinces =  e(N) 
			qui estadd local Countries =  e(N_clust)
			qui estadd local Country_FE "Yes"
			qui estadd local Ruggedness "Yes"
			qui estadd local Popdensity1975 "Yes"
		qui drop frac15_autocracy_1975  frac15_intregime_1975  frac15_democracy_1975 			
	}
			
	if `n' == 1 {  
		esttab using Table_2.csv, stats(r2_a Ptest1 Ptest2 Ptest3 Provinces Countries Country_FE Ruggedness Popdensity1975, fmt(3 0)) starlevels(* .1 ** .05 *** .01) b(%9.3f) se(%9.3f) replace keep(*frac15*)    
		eststo clear		
	}
	if `n' == 2 {    
		esttab using Table_2.csv, stats(r2_a Ptest1 Ptest2 Ptest3 Provinces Countries Country_FE Ruggedness Popdensity1975, fmt(3 0)) starlevels(* .1 ** .05 *** .01) b(%9.3f) se(%9.3f) append keep(*frac15* *urb* *primate*)    
		eststo clear					
	}
}

/****************************************  Figures ****************************************/

* Figure 1: Global Map of Ethno-linguistic Fractionalization at the Province Level

// Created in ArcGIS

* Figure 2: Distributions and Regressions: Ethno-linguistic Fractionalization, Con-flict and Urban Concentration
// 2A. Conflict
lpoly conflictBinary_1975_2015_demean frac15_1975_demean, ///
	degree(1) ci noscatter legend(on region(col(white))  position(6) ring(0)) ///
	lcolor(red) xtitle("Fractionalization (level 15), demeaned") ytitle("Conflict, demeaned") ///
	xtitle(, size(large)) ytitle(, size(large)) xlabel(,labsize(medlarge)) ylabel(,labsize(medlarge)) ///
	graphregion(color(white)) bgcolor(white) xscale(range(-1 1)) yscale(range(-.4 .2)) ylabel(-.4[.2].2) title("{bf:A.} Conflict", color(black)) note("")
	graph save Figure_2A, replace
	graph export Figure_2A.pdf, replace

// 2B. Urban share
lpoly urbanshare_2015_demean frac15_1975_demean, ///
	degree(1) ci noscatter legend(on region(col(white))  position(6) ring(0)) ///
	lcolor(red) xtitle("Fractionalization (level 15), demeaned") ytitle("Urban share, demeaned") ///
	xtitle(, size(large)) ytitle(, size(large)) xlabel(,labsize(medlarge)) ylabel(,labsize(medlarge)) ///
	graphregion(color(white)) bgcolor(white) xscale(range(-1 1)) yscale(range(-.6 .4)) ylabel(-.6[.2].4) title("{bf:B.} Urban population share", color(black)) note("")
	graph save Figure_2B, replace
	graph export Figure_2B.pdf, replace

// 2C. Primate share
lpoly primateshare_2015_demean  frac15_1975_demean, ///
	degree(1) ci noscatter legend(on region(col(white))  position(6) ring(0)) ///
	lcolor(red) xtitle("Fractionalization (level 15), demeaned") ytitle("Primate share, demeaned") ///
	xtitle(, size(large)) ytitle(, size(large)) xlabel(,labsize(medlarge)) ylabel(,labsize(medlarge)) ///
	graphregion(color(white)) bgcolor(white) xscale(range(-1 1)) yscale(range(-.6 .4)) ylabel(-.6[.2].4) title("{bf:C.} Primate city share", color(black)) note("")
	graph save Figure_2C, replace
	graph export Figure_2C.pdf, replace
	
// Combine Figure 2
graph combine Figure_2A.gph Figure_2B.gph Figure_2C.gph, cols(1) iscale(.7273) ysize(12) graphregion(margin(zero)) 	
graph export Figure_2.pdf, replace

* Figure 3: The Use of Ethnologue Language Trees:  Illustration for the Indian Province Himachal Pradesh.
// Manually generated based on data from the Ethnologue website

* Figure 4: Ethno-linguistic Fractionalization, Conflict and Urban Concentration: Resultsfor Different Aggregation Levels
local yvar urbanshare_2015 primateshare_2015 conflictBinary_1975_2015
foreach outcome in `yvar' {
	if "`outcome'"=="urbanshare_2015" {
		local pastdepctrl urbanshare_1975
		local title = "{bf:A.} Urban population share"
	}	
	if "`outcome'"=="primateshare_2015" {
		local pastdepctrl primateshare_1975
		local title = "{bf:B.} Primate city share"
	}
	if "`outcome'"=="conflictBinary_1975_2015" {
		local pastdepctrl
		local title = "{bf:C.} Conflict"
	}			
			
	foreach tree of varlist frac1_1975 - frac15_1975 {
		qui eststo `tree': reghdfe `outcome' `tree' ruggedness popdensity_1975 `pastdepctrl', absorb(country_id) vce(cluster country_id)
	}
				
	coefplot (frac1_1975 \ frac2_1975 \ frac3_1975 \ frac4_1975 \ frac5_1975 \ frac6_1975 \ frac7_1975 \ frac8_1975 \ frac9_1975 \ frac10_1975 \ /// 
	frac11_1975 \ frac12_1975 \ frac13_1975 \ frac14_1975 \ frac15_1975), keep(frac*) vertical coeflabels(frac1_1975 = "1" frac2_1975 = "2" frac3_1975 = "3" ///
	frac4_1975 = "4" frac5_1975 = "5" frac6_1975 = "6" frac7_1975 = "7" frac8_1975 = "8" frac9_1975 = "9" frac10_1975 = "10" /// 
	frac11_1975 = "11" frac12_1975 = "12" frac13_1975 = "13" frac14_1975 = "14" frac15_1975 = "15") levels (95) ciopts(recast(rcap)) yline(0) ///
	xtitle("Aggregation level") ytitle("Regression coefficient") ylabel(-0.2(0.2)0.3, gmax) ///
	scheme(gap_axes) graphregion(color(white) margin(0 0 0 1)) bgcolor(white) xtitle(, size(large)) ytitle(, size(large)) xlabel(,labsize(large)) ylabel(,labsize(large)) title("`title'", color(black)) note("")
	
	if "`outcome'"=="urbanshare_2015" {
		qui graph save Figure_4A, replace
		qui graph export Figure_4A.pdf, replace
	}
	if "`outcome'"=="primateshare_2015" {
		qui graph save Figure_4B, replace
		qui graph export Figure_4B.pdf, replace
	}
	if "`outcome'"=="conflictBinary_1975_2015" {
		qui graph save Figure_4C, replace
		qui graph export Figure_4C.pdf, replace
	}
	eststo clear
}
// Combine Figure 4
graph combine Figure_4A.gph Figure_4B.gph Figure_4C.gph, cols(1) iscale(.7273) imargin(4 4 4 4) ysize(12) graphregion(margin(zero)) 	
graph export Figure_4.pdf, replace

/****************************** B. Supplementary Information ******************************/

/***************************************** Tables ****************************************/

* Table S1. Descriptive summary statistics of main variables
// Outcome variables
estpost summarize urbanshare_2015, listwise
esttab using Table_S1.csv, replace noobs nonumber cells("count(fmt(0)) mean(fmt(2)) sd(fmt(2)) min(fmt(2)) max(fmt(2))")
estpost summarize primateshare_2015, listwise
esttab using Table_S1.csv, append noobs nonumber cells("count(fmt(0)) mean(fmt(2)) sd(fmt(2)) min(fmt(2)) max(fmt(2))")
// Ethnicity indices
estpost summarize frac1_1975 frac8_1975 frac15_1975 pol1_1975 pol8_1975 pol15_1975, listwise
esttab using Table_S1.csv, append noobs nonumber cells("count(fmt(0)) mean(fmt(2)) sd(fmt(2)) min(fmt(2)) max(fmt(2))")
// Control variables
estpost summarize ruggedness popdensity_1975 urbanshare_1975, listwise
esttab using Table_S1.csv, append noobs nonumber cells("count(fmt(0)) mean(fmt(2)) sd(fmt(2)) min(fmt(2)) max(fmt(2))") 
estpost summarize primateshare_1975, listwise
esttab using Table_S1.csv, append noobs nonumber cells("count(fmt(0)) mean(fmt(2)) sd(fmt(2)) min(fmt(2)) max(fmt(2))")
eststo clear

* Table S2. Robustness to Alternative Control Variables
local ctrlprov1 ruggedness popdensity_1975
local ctrlprov2 popdensity_1975
local ctrlprov3 ruggedness popdensity_1975 popdensity_1975_sq popdensity_1975_cubic
local ctrlprov4 ruggedness popdensity_1975 dist_to_coast elevation latitude capital_in_province conflictEvents_1946_1974 geconppp_1990
foreach n of numlist 1/2 { // n==1: Panel A (cross-sectional) ; n==2: Panel B (longitudinal)
	// Urban share
	if `n' == 1 { 
		// Panel A (cross-sectional)
		local pastdepctrl 
	}
	if `n' == 2 { 
		// Panel B (longitudinal)
		local pastdepctrl	urbanshare_1975
	}
	foreach x of numlist 1/4 {		
		eststo: reghdfe urbanshare_2015 frac15_1975	`ctrlprov`x'' `pastdepctrl'			if urbanshare_1975!=. 		,absorb(country_id) vce(cluster country_id)
			qui	estadd local Provinces =  e(N) 
			qui	estadd local Countries =  e(N_clust)
			qui	estadd local Country_FE "Yes"											
	}
	
	// Primate share
	if `n' == 2 { 
		local pastdepctrl 	primateshare_1975
	}		
	foreach x of numlist 1/4 {	
		eststo: reghdfe primateshare_2015 frac15_1975 `ctrlprov`x''	`pastdepctrl'		if primateshare_1975!=. 	,absorb(country_id) vce(cluster country_id)
			qui estadd local Provinces =  e(N) 
			qui estadd local Countries =  e(N_clust)
			qui estadd local Country_FE "Yes"		
	}	
   
	if `n' == 1 {  
		esttab using Table_S2.csv, stats(r2_a Provinces Countries Country_FE, fmt(3 0)) starlevels(* .1 ** .05 *** .01) b(%9.3f) drop(_cons) order(*frac* *rugged* *popdens* *coast* elevation *latitude* *capital* conflictEvents_1946_1974) replace 
		eststo clear		
	}	
	if `n' == 2 {    
		esttab using Table_S2.csv, stats(r2_a Provinces Countries Country_FE, fmt(3 0)) starlevels(* .1 ** .05 *** .01) b(%9.3f) drop(_cons) order(*frac* *rugged* *popdens* *coast* elevation *latitude* *capital* conflictEvents_1946_1974) append    
		eststo clear		
	}			
}

* Table S3. Ethno-linguistic Polarization and Urbanization Patterns
foreach n of numlist 1/2  { // n==1: Panel A (cross-sectional) ; n==2: Panel B (longitudinal)
	// Urban share
	if `n' == 1 { 
		// Panel A (cross-sectional)
		local pastdepctrl 
	}
	if `n' == 2 { 
		// Panel B (longitudinal)
		local pastdepctrl urbanshare_1975
	}	
	// Urban share: without controls
	eststo: reghdfe urbanshare_2015 pol15_1975 `pastdepctrl'																,absorb(country_id) vce(cluster country_id)
		qui estadd local Provinces =  e(N) 
		qui estadd local Countries =  e(N_clust)
		qui estadd local Country_FE "Yes"			
	// Urban share: with controls		
	eststo: reghdfe urbanshare_2015 pol15_1975 `pastdepctrl' ruggedness popdensity_1975										,absorb(country_id) vce(cluster country_id)
		qui estadd local Provinces =  e(N) 
		qui estadd local Countries =  e(N_clust)
		qui estadd local Country_FE "Yes"				
		qui estadd local Ruggedness "Yes"
		qui estadd local Popdensity1975 "Yes"								
				
	// Primate share
	if `n' == 2 { 
		local pastdepctrl 	primateshare_1975
	}
	// Primate share: full sample, without controls
	eststo: reghdfe primateshare_2015 pol15_1975 `pastdepctrl'																,absorb(country_id) vce(cluster country_id)
		qui estadd local Provinces =  e(N) 
		qui estadd local Countries =  e(N_clust)
		qui estadd local Country_FE "Yes"	
	// Primate share: full sample, with controls
	eststo: reghdfe primateshare_2015 pol15_1975 `pastdepctrl' ruggedness popdensity_1975									,absorb(country_id) vce(cluster country_id)
		qui estadd local Provinces =  e(N) 
		qui estadd local Countries =  e(N_clust)
		qui estadd local Country_FE "Yes"				
		qui estadd local Ruggedness "Yes"
		qui estadd local Popdensity1975 "Yes"				
	// Primate share: restricted sample, without controls
	eststo: reghdfe primateshare_2015 pol15_1975 `pastdepctrl'									if  primateshare_1975!=. 	,absorb(country_id) vce(cluster country_id)
		qui estadd local Provinces =  e(N) 
		qui estadd local Countries =  e(N_clust)
		qui estadd local Country_FE "Yes"					
	// Primate share: restricted sample, with controls								
	eststo: reghdfe primateshare_2015 pol15_1975 `pastdepctrl' ruggedness popdensity_1975		if  primateshare_1975!=. 	,absorb(country_id) vce(cluster country_id)
		qui estadd local Provinces =  e(N) 
		qui estadd local Countries =  e(N_clust)
		qui estadd local Country_FE "Yes"
		qui estadd local Ruggedness "Yes"
		qui estadd local Popdensity1975 "Yes"					
		
	// Export tables	 
	if `n' == 1 {  
		esttab using Table_S3.csv, stats(r2_a Provinces Countries Country_FE Ruggedness Popdensity1975, fmt(3 0)) starlevels(* .1 ** .05 *** .01) b(%9.3f) replace keep(*pol*)     
		eststo clear		
	}	
	if `n' == 2 {    
		esttab using Table_S3.csv, stats(r2_a Provinces Countries Country_FE Ruggedness Popdensity1975, fmt(3 0)) starlevels(* .1 ** .05 *** .01) b(%9.3f) append keep(*pol* *urb* *primate*)    
		eststo clear		
	}			
}

* Table S4. Robustness to Alternative Urban Definitions
local polfrac frac pol
foreach n of numlist 1/2 { // n==1: Panel A (cross-sectional) ; n==2: Panel B (longitudinal)	
	// Urban share (alternative)
	if `n' == 1 { 
		// Panel A (cross-sectional)
		local pastdepctrl 
	}
	if `n' == 2 { 
		// Panel B (longitudinal)
		local pastdepctrl urbanshare_dense_1975
	}	
	foreach p in `polfrac' {
		eststo: reghdfe urbanshare_dense_2015 `p'15_1975 `pastdepctrl' ruggedness popdensity_1975	if urbanshare_dense_1975!=.		,absorb(country_id) vce(cluster country_id)
			qui estadd local Provinces =  e(N) 
			qui estadd local Countries =  e(N_clust)
			qui estadd local Country_FE "Yes"				
			qui estadd local Ruggedness "Yes"
			qui estadd local Popdensity1975 "Yes"								
	}
			
	// Primate share (alternative)
	if `n' == 2 { 
		local pastdepctrl urbanshare_1975
	}
	foreach p in `polfrac' {
		eststo: reghdfe primateshare_FUA_2015 `p'15_1975 `pastdepctrl' ruggedness popdensity_1975	if urbanshare_1975!=. 			,absorb(country_id) vce(cluster country_id)
			qui estadd local Provinces =  e(N) 
			qui estadd local Countries =  e(N_clust)
			qui estadd local Country_FE "Yes"
			qui estadd local Ruggedness "Yes"
			qui estadd local Popdensity1975 "Yes"			
	}		
		
	if `n' == 1 {      
		esttab using Table_S4.csv, stats(r2_a Provinces Countries Country_FE Ruggedness Popdensity1975, fmt(3 0)) starlevels(* .1 ** .05 *** .01) b(%9.3f) replace keep(*frac* *pol*) order(*frac* *pol*)     
		eststo clear		
	}	
	if `n' == 2 { 
		esttab using Table_S4.csv, stats(r2_a Provinces Countries Country_FE Ruggedness Popdensity1975, fmt(3 0)) starlevels(* .1 ** .05 *** .01) b(%9.3f) append keep(*frac* *pol* *urb*) order(*frac* *pol*)     
		eststo clear				
	}
}

* Table S5. Robustness of Provinces as Unit of Observation (Area-based Sample Splits)
local split ctryavg_provincearea ctryavg_area_popweighted_1975
foreach n of numlist 1/2 { // n==1: Panel A (cross-sectional) ; n==2: Panel B (longitudinal)	
	foreach a in `split' {
		// Urban share
		if `n' == 1 { 
			// Panel A (cross-sectional)
			local pastdepctrl 
		}
		if `n' == 2 { 
			// Panel B (longitudinal)
			local pastdepctrl urbanshare_1975
		}
		foreach s of numlist 0/1 { // split sample
			eststo: reghdfe urbanshare_2015 frac15_1975	`pastdepctrl' ruggedness popdensity_1975		if `a'M ==`s'		,absorb(country_id) vce(cluster country_id)
				qui estadd local Provinces =  e(N) 
				qui estadd local Countries =  e(N_clust)
				qui estadd local Country_FE "Yes"				
				qui estadd local Ruggedness "Yes"
				qui estadd local Popdensity1975 "Yes"								
		}
					
		// Primate share		
		if `n' == 2 { 
			local pastdepctrl primateshare_1975
		}									
		foreach s of numlist 0/1 { // split sample
			eststo: reghdfe primateshare_2015 frac15_1975 `pastdepctrl' ruggedness popdensity_1975		if `a'M ==`s'		,absorb(country_id) vce(cluster country_id)
				qui estadd local Provinces =  e(N) 
				qui estadd local Countries =  e(N_clust)
				qui estadd local Country_FE "Yes"
				qui estadd local Ruggedness "Yes"
				qui estadd local Popdensity1975 "Yes"			
		}	
	}		
	
	if `n' == 1 {  
		esttab using Table_S5.csv, stats(r2_a Provinces Countries Country_FE Ruggedness Popdensity1975, fmt(3 0)) starlevels(* .1 ** .05 *** .01) b(%9.3f) replace keep(*frac*)     
		eststo clear		
	}			
	if `n' == 2 {     
		esttab using Table_S5.csv, stats(r2_a Provinces Countries Country_FE Ruggedness Popdensity1975, fmt(3 0)) starlevels(* .1 ** .05 *** .01) b(%9.3f) append keep(*frac* *urb* *primate*)    
		eststo clear		
	}	
}

* Table S6. Robustness of Provinces as Unit of Observation (Population-based Sample Splits and number of Province per Country)
local split ctryavg_provincepop_1975 provinces_per_country
foreach n of numlist 1/2 { // n==1: Panel A (cross-sectional) ; n==2: Panel B (longitudinal)
	foreach a in `split' {
		// Urban share
		if `n' == 1 { 
			// Panel A (cross-sectional)
			local pastdepctrl 
		}
		if `n' == 2 { 
			// Panel B (longitudinal)
			local pastdepctrl urbanshare_1975
		}
		foreach s of numlist 0/1 { // split sample
			eststo: reghdfe urbanshare_2015 frac15_1975	`pastdepctrl' ruggedness popdensity_1975 		if `a'M ==`s' 		,absorb(country_id) vce(cluster country_id)
				qui estadd local Provinces =  e(N) 
				qui estadd local Countries =  e(N_clust)
				qui estadd local Country_FE "Yes"				
				qui estadd local Ruggedness "Yes"
				qui estadd local Popdensity1975 "Yes"								
		}
					
		// Primate share		
		if `n' == 2 { 
			local pastdepctrl primateshare_1975
		}									
		foreach s of numlist 0/1 { // split sample
			eststo: reghdfe primateshare_2015 frac15_1975 `pastdepctrl' ruggedness popdensity_1975 		if `a'M ==`s' 		,absorb(country_id) vce(cluster country_id)
				qui estadd local Provinces =  e(N) 
				qui estadd local Countries =  e(N_clust)
				qui estadd local Country_FE "Yes"
				qui estadd local Ruggedness "Yes"
				qui estadd local Popdensity1975 "Yes"			
		}	
	}
	
	if `n' == 1 {      
		esttab using Table_S6.csv, stats(r2_a Provinces Countries Country_FE Ruggedness Popdensity1975, fmt(3 0)) starlevels(* .1 ** .05 *** .01) b(%9.3f) replace keep(*frac*)     
		eststo clear		
	}		
	if `n' == 2 {      
		esttab using Table_S6.csv, stats(r2_a Provinces Countries Country_FE Ruggedness Popdensity1975, fmt(3 0)) starlevels(* .1 ** .05 *** .01) b(%9.3f) append keep(*frac* *urb* *primate*)     
		eststo clear		
	}	
}

* Table S7. Policy Implications: The Role of Democracy (Polarization)
foreach n of numlist 1/2 { // n==1: Panel A (cross-sectional) ; n==2: Panel B (longitudinal)
	// Polity 4
	foreach y in urbanshare primateshare {
		if `n' == 1 { 
			// Panel A (cross-sectional)
			local pastdepctrl 
		}
		if `n' == 2 { 
			// Panel B (longitudinal)
			local pastdepctrl `y'_1975
		}
		qui gen pol15_autocracy_1975 = pol15_1975 * polityM033_1975	
		qui gen pol15_intregime_1975 = pol15_1975 * polityM3466_1975	
		qui gen pol15_democracy_1975 = pol15_1975 * polityM67100_1975	
		eststo: reghdfe `y'_2015 pol15_democracy_1975 pol15_intregime_1975 pol15_autocracy_1975 ruggedness popdensity_1975 `pastdepctrl' 	if `y'_1975!=.		,absorb(country_id) vce(cluster country_id)
			qui test pol15_democracy_1975 = pol15_intregime_1975
			qui estadd local Ptest1 = round( r(p), .001) 
			qui test pol15_intregime_1975 = pol15_autocracy_1975
			qui estadd local Ptest2 = round( r(p), .001) 	
			qui test pol15_democracy_1975 = pol15_autocracy_1975
			qui estadd local Ptest3 = round( r(p), .001) 	
			qui estadd local Provinces =  e(N) 
			qui estadd local Countries =  e(N_clust)
			qui estadd local Country_FE "Yes"
			qui estadd local Ruggedness "Yes"
			qui estadd local Popdensity1975 "Yes"							
		qui drop pol15_autocracy_1975 pol15_intregime_1975 pol15_democracy_1975  	
	}
		
	// Freedom House
	foreach y in urbanshare primateshare {
		if `n' == 2 { 
			local pastdepctrl `y'_1975
		}
		qui gen pol15_autocracy_1975 = pol15_1975 * fh_statusDnotfree_1975	
		qui gen pol15_intregime_1975 = pol15_1975 * fh_statusDpartfree_1975	
		qui gen pol15_democracy_1975 = pol15_1975 * fh_statusDfree_1975			
		eststo: reghdfe `y'_2015 pol15_democracy_1975 pol15_intregime_1975 pol15_autocracy_1975 ruggedness popdensity_1975 `pastdepctrl' 	if `y'_1975!=. 		,absorb(country_id) vce(cluster country_id)
			qui test pol15_democracy_1975 = pol15_intregime_1975
			qui estadd local Ptest1 = round( r(p), .001) 
			qui test pol15_intregime_1975 = pol15_autocracy_1975
			qui estadd local Ptest2 = round( r(p), .001) 	
			qui test pol15_democracy_1975 = pol15_autocracy_1975
			qui estadd local Ptest3 = round( r(p), .001) 					
			qui estadd local Provinces =  e(N) 
			qui estadd local Countries =  e(N_clust)
			qui estadd local Country_FE "Yes"
			qui estadd local Ruggedness "Yes"
			qui estadd local Popdensity1975 "Yes"
		qui drop pol15_autocracy_1975 pol15_intregime_1975 pol15_democracy_1975 			
	}
			
	if `n' == 1 {      
		esttab using Table_S7.csv, stats(r2_a Ptest1 Ptest2 Ptest3 Provinces Countries Country_FE Ruggedness Popdensity1975, fmt(3 0)) starlevels(* .1 ** .05 *** .01) b(%9.3f) replace keep(pol15*)      
		eststo clear		
	}
	if `n' == 2 {    
		esttab using Table_S7.csv, stats(r2_a Ptest1 Ptest2 Ptest3 Provinces Countries Country_FE Ruggedness Popdensity1975, fmt(3 0)) starlevels(* .1 ** .05 *** .01) b(%9.3f) append keep(pol15* urb* primate*)     
		eststo clear					
	}
}

* Table S8. Ethno-linguistic Diversity and Conflict
local polfrac frac pol
qui tabulate country_id, generate(countryBinary) 
qui drop countryBinary1
// Overall
foreach p in `polfrac' {
	eststo: poisson  conflictEvents_1975_2015 `p'15_1975 ruggedness popdensity_1975	countryBinary*												, vce(cluster country_id)
		qui estadd local Country_FE "Yes"
		qui estadd local Provinces =  e(N) 
		qui estadd local Countries =  e(N_clust)
		qui estadd local r2_ps = round(e(r2_p), .001)  
		qui su conflictEvents_1975_2015
		qui estadd local devarmean = round(r(mean), .001) 	
		qui estadd local Ruggedness "Yes"
		qui estadd local Popdensity1975 "Yes"							
}
// Extensive margin	
foreach p in `polfrac' {
	eststo: reghdfe conflictBinary_1975_2015 `p'15_1975	ruggedness popdensity_1975																, absorb(country_id) vce(cluster country_id)
		qui estadd local Country_FE "Yes"
		qui estadd local Provinces =  e(N) 
		qui estadd local Countries =  e(N_clust)
		qui su conflictBinary_1975_2015
		qui estadd local devarmean = round(r(mean), .001) 
		qui estadd local Ruggedness "Yes"
		qui estadd local Popdensity1975 "Yes"
}		 
// Intesive margin
foreach p in `polfrac' {
	eststo: poisson  conflictEvents_1975_2015 `p'15_1975 ruggedness popdensity_1975	countryBinary* 		if conflictEvents_1975_2015>0			, vce(cluster country_id)
		qui estadd local Country_FE "Yes"
		qui estadd local Provinces =  e(N) 
		qui estadd local Countries =  e(N_clust)
		qui estadd local r2_ps = round(e(r2_p), .001)  
		qui su conflictEvents_1975_2015 if conflictEvents_1975_2015>0
		qui estadd local devarmean = round(r(mean), .001)  
		qui estadd local Ruggedness "Yes"
		qui estadd local Popdensity1975 "Yes"
}

esttab using Table_S8.csv, stats(devarmean r2_a r2_ps Provinces Countries Country_FE Ruggedness Popdensity1975, fmt(3 3 0)) starlevels(* .1 ** .05 *** .01) b(%9.3f) replace keep(*frac* *pol*)   
eststo clear		

/****************************************  Figures ****************************************/

* Figure S1: Degree of Urbanization in Europe, 2015
// Created in ArcGIS based GHS data

* Figure S2. Ethnologue Language Tree for Switzerland.
// Manually generated based on data from the Ethnologue website

* Figure S3: Global Map of Ethno-Linguistic Fractionalization (Tree Level 1) at the Province Level
// Created in ArcGIS

* Figure S4: Ethno-linguistic Polarization and Fractionalization
scatter pol15_1975 frac15_1975 || lfit pol15_1975 frac15_1975 ///
	, xtitle("Fractionalization (level 15)") ytitle("Polarization (level 15)") ///
	xtitle(, size(large)) ytitle(, size(large)) xlabel(,labsize(medlarge)) ylabel(,labsize(medlarge)) legend(off) graphregion(color(white)) bgcolor(white)
qui graph export Figure_S4A.pdf, replace

scatter pol15_1975_demean frac15_1975_demean || lfit pol15_1975_demean frac15_1975_demean ///
	, xtitle("Fractionalization (level 15)") ytitle("Polarization (level 15)") ///
	xtitle(, size(large)) ytitle(, size(large)) xlabel(,labsize(medlarge)) ylabel(,labsize(medlarge)) ///
	legend(off) graphregion(color(white)) bgcolor(white)
qui graph export Figure_S4B.pdf, replace
